// PART 1 PRODUCES RESULTS FOR TABLES 3-6 AND FIGURES 1-3

clear
cd "\\file\UsersW$\wrr15\Home\My Documents\My Files\XINDONG XUE\META-ANALYSIS\REVISION FOR JHE\DATA AND CODE"
set more off
set type double
graph drop _all
program drop _all
set scheme s2mono
log using "Part1 Results(20191130).smcl", replace
use "sc_data(20191130).dta"

******************************************************************************
*DEFINITION OF BASIC VARIABLES
******************************************************************************
gen tstat = real(tstatistics)
gen negative = 1
replace negative = -1 if ((badhealth == 1 & badsc == 0) | (badhealth == 0 & badsc == 1))
replace tstat = tstat*negative
gen df = nobs
gen obsno = _n
replace df = . if tstat == .
gen r = tstat/sqrt(tstat^2+df)
gen varR = (1-r^2)/df
gen seR = sqrt(varR)
gen pcc = r

// Study Type
describe journal
gen journal2 = journal
encode journal, gen(njournal)
drop journal
gen journal=(njournal != .)
summ pubyear
replace pubyear = pubyear - r(mean)


// Countries
gen eastasia=(country_region=="East_Asia")
gen usa=(country_region=="USA")
gen westnortheurope=(country_region=="West_North_Europe")
gen highincome=(country_region=="High-income countries")
gen othercountry = 1 - eastasia - usa - westnortheurope - highincome

// Social Capital Measures
gen cognitivestructual2 = cognitivestructual
replace cognitivestructual = (cognitivestructual2 == 0 | cognitivestructual2 == 1)
gen bondingbridginglinking2 = bondingbridginglinking
replace bondingbridginglinking = (bondingbridginglinking2 == 0 | bondingbridginglinking2 == 1 | bondingbridginglinking2 == 2)
gen both = (cognitivestructual == 1 & bondingbridginglinking == 1)

// Estimation Method
gen otherestimation = 1 - ols - fgls - probitlogit - orderedprobitlogit - orhr - hlm - iv 

// Calculation of t-statistic
gen tcalculatedbyci=(tcalculatedbycise==1 & cilowerbound != .)
gen tcalculatedbypvalue2 = tcalculatedbypvalue
replace tcalculatedbypvalue = (tcalculatedbypvalue2 == 1)
gen tnormal = 1 - tcalculatedbyci - tcalculatedbypvalue



*************************************
*************************************
*************************************
************ TABLE 3 ****************
*************************************
*************************************
*************************************

******************************************************************************
* Distribution of t-stats, df, and PCCs before kicking out outliers
******************************************************************************
summ tstat, detail
summ df, detail 
summ pcc, detail

******************************************************************************
* Kick out extreme values of PCC
******************************************************************************
quietly summ pcc, detail
keep if pcc > r(p1) & pcc < r(p99) 

summ tstat, detail
summ df, detail 
summ pcc, detail

*************************************
*************************************
*************************************
*********** FIGURE 1  ***************
*************************************
*************************************
*************************************

******************************************************************************
* Characteristics of t-stats and PCCs of final data set
******************************************************************************

histogram tstat, bin(80) name(histTSTAT) fraction
gen tstat1 = (tstat<-2)
gen tstat2 = (tstat>=-2 & tstat<=2)
gen tstat3 = (tstat>2)
// NOTE: A lot of insignificant t-stats
sum tstat1 tstat2 tstat3

summ pcc, detail
histogram pcc, bin(80) name(histPCC) fraction

*************************************
*************************************
*************************************
*********** FIGURE 2  ***************
*************************************
*************************************
*************************************

histogram pcc if physicalhealth == 1, bin(80) name(histPhysHealth) fraction
summ pcc if physicalhealth == 1
histogram pcc if mentalhealth == 1, bin(80) name(histMentHealth) fraction 
summ pcc if mentalhealth == 1
histogram pcc if generalhealth == 1, bin(80) name(histGenHealth) fraction
summ pcc if generalhealth == 1

******************************************************************************
* Relationship between t-stats and PCCs
******************************************************************************
// This gives the correlation between pcc and tstat (= 0.65).
corr pcc tstat

******************************************************************************
* Relationship of PCCs over time
******************************************************************************
bysort id: egen PCCmed = median(pcc)
graph twoway (scatter PCCmed pubyear, msize(*0.5) msymbol(Oh) name(PCCtime)) (lfit PCCmed pubyear) 
// Not much change over time.


*************************************
*************************************
*************************************
************ TABLE 4 ****************
*************************************
*************************************
*************************************

// Study Type
summ journal
summ pubyear
// For our empirical analysis, we subtract off the mean
replace pubyear = pubyear - r(mean)


// Data Characteristics
sum individualdata aggregatedata individualsc
sum panel cs

// Countries
summ eastasia usa westnortheurope highincome othercountry

// Health Measure
summ physicalhealth mentalhealth generalhealth selfreported

// Social Capital Measures
summ cognitivestructual 
summ bondingbridginglinking
summ numberofscvariables
replace numberofscvariables = numberofscvariables - r(mean)

// Control Variables
summ age gender education maritalstatus income

// Estimation Method
summ ols orhr hlm fgls probitlogit orderedprobitlogit iv otherestimation
summ seother seols 
summ tnormal tcalculatedbypvalue tcalculatedbyci  

*************************************
*************************************
*************************************
************ TABLE 5 ****************
*************************************
*************************************
*************************************

// Generating transformed variables for FE and RE
gen fetstatR = r/seR
metareg r seR pubyear panel iv individualsc eastasia westnortheurope highincome othercountry ///
physicalhealth mentalhealth selfreported cognitivestructual bondingbridginglinking ///
numberofscvariables age gender education maritalstatus income ols orhr hlm ///
seother tnormal tcalculatedbypvalue, wsse(seR)
scalar tau2 =  e(tau2)
gen revarR = varR + tau2
gen reseR = sqrt(revarR)
gen retstatR = r/reseR

/*
******************************************************************************
* Determining the weight of individual studies (Fixed Effects)
******************************************************************************
gen fevarR = varR
gen feweightR = 1/fevarR
gen rR = r*feweightR
collapse (sum) rR feweightR, by (id)
gen avgR = rR/feweightR
gen avgsterrR = sqrt(1/feweightR)
sort id
metan avgR avgsterrR, fixed lcols(id) nobox
gen studyweight = _WT
summ studyweight, detail
gsort -studyweight
// NOTE: The top 3 studies account for approximately 70% of the weight! 
*/

/*
******************************************************************************
* Determining the weight of individual studies (Random Effects)
******************************************************************************
gen reweightR = 1/revarR
gen rR = r*reweightR
collapse (sum) rR reweightR, by (id)
gen avgR = rR/reweightR
gen avgsterrR = sqrt(1/reweightR)
sort id
metan avgR avgsterrR, random lcols(id) nobox
gen studyweight = _WT
summ studyweight, detail
gsort -studyweight
// NOTE: Random effects very evenly (too evenly?) weighted
*/

*************************************
*************************************
*************************************
*********** FIGURE 3  ***************
*************************************
*************************************
*************************************

******************************************************************************
* Funnel Plot for individual estimates
******************************************************************************
// FUNNEL GRAPH
metafunnel r seR, ylabel(#8) name(funnelA) //name(funnel, replace) nolines
metafunnel r seR if seR < 0.2, ylabel(#8) name(funnelAA) //name(funnel, replace) nolines
//NOTE: No dramattic evidence of asymmetry in estimates, hence no evidence of publication bias
//NOTE: But much evidence of random effects!

******************************************************************************
* Funnel Plot for study means
******************************************************************************
//FUNNEL GRAPH
by id, sort: egen meanr = mean(r)
by id, sort: egen meanseR = mean(seR)
metafunnel meanr meanseR, ylabel(#8) name(funnelB)  //name(funnel, replace) nolines


*************************************
*************************************
*************************************
************ TABLE 6 ****************
*************************************
*************************************
*************************************

******************************************************************************
* Calculating study weights based on number of estimates per study
******************************************************************************
bysort id: egen numberests = count(id)
tabulate numberests 
gen weight = 1/numberests


******************************************************************************
* FAT/PET
******************************************************************************

******************************************************************************
* FIXED EFFECTS
******************************************************************************
// Correcting for heteroskedasticity
gen feprecisionR = 1/seR

// Equal weight to each estimate (Column 1)
regress fetstatR feprecisionR, vce(cluster id)

// Equal weight to each study (Column (2)
regress fetstatR feprecisionR [pweight = weight], vce(cluster id)

******************************************************************************
* RANDOM EFFECTS 
******************************************************************************
// Correcting for heteroskedasticity
gen reprecisionR = 1/reseR
gen repubbiasR = seR/reseR

// Equal weight to each estimate (Column 3)
regress retstatR repubbiasR reprecisionR ,  noc vce(cluster id)

// Equal weight to each study (Column 4)
regress retstatR repubbiasR reprecisionR [pweight = weight], noc vce(cluster id)

******************************************************************************
* ESTIMATE OF OVERALL EFFECT WITHOUT CORRECTING FOR PUBLICATION BIAS
******************************************************************************

// RANDOM EFFECTS
// Equal weight to each estimate (Column 5) 
regress retstatR reprecisionR,  noc vce(cluster id)
//Equal weight to each study (Column 6)
regress retstatR reprecisionR [pweight = weight], noc vce(cluster id)


log close
 